knitr::opts_chunk$set(echo = TRUE)
list.of.packages <- c("gsheet", "tidyverse", "janitor", "plotly", "glmmTMB", "metafor", "broom.mixed") #add new libraries here
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
# Load all libraries
lapply(list.of.packages, FUN = function(X) {
do.call("require", list(X))
})
sessionInfo()
NOTE: data is read directly from the GoogleSheet using a share link that was set to “anyone with a link can view”
# Read in data from GoogleSheet
data.fert <- as_tibble(gsheet2tbl('https://docs.google.com/spreadsheets/d/111SuH548Et6HDckjbQjtYk-B8A5VfNkUYZgcUNRPxxY/edit?usp=sharing'))
## Warning: Missing column names filled in: 'X22' [22]
#replace NR (not reported) with NA and convert columns to factor / numeric where needed
data.fert <- data.fert %>%
na_if("NR") %>%
mutate_at(c('Phylum', 'Study', 'Taxonomic Group', 'Common name', 'Latin name', 'Error statistic'), as.factor) %>%
mutate_at(c('pH Experimental', 'pH Control', 'pCO2 Experimental', 'pCO2 Control', 'Ave. Fert. % @ pH', 'Error % @ pH', '# Trials @ pH', 'Insemination time'), as.numeric) %>%
clean_names() # fill in spaces with underscores for column names
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
str(data.fert)
## Classes 'tbl_df', 'tbl' and 'data.frame': 334 obs. of 22 variables:
## $ phylum : Factor w/ 4 levels "Cnidaria","Crustacea",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ paper_link : chr "https://doi.org/10.1016/j.jembe.2012.12.014" "https://doi.org/10.1016/j.jembe.2012.12.014" "https://doi.org/10.1016/j.jembe.2012.12.014" "https://doi.org/10.1080/15287394.2011.550460" ...
## $ study : Factor w/ 37 levels "Albright, 2011",..: 3 3 3 4 4 5 5 6 6 6 ...
## $ taxonomic_group : Factor w/ 10 levels "abalone","clam",..: 8 8 8 5 5 8 8 2 2 2 ...
## $ common_name : Factor w/ 30 levels "amphipod","Antarctic bivalve",..: 21 21 21 7 7 11 11 2 2 2 ...
## $ latin_name : Factor w/ 41 levels "Acartia erythraea",..: 12 12 12 29 29 13 13 23 23 23 ...
## $ p_h_experimental : num 7.37 7.76 8.09 7.6 8.1 7.5 7.9 7.63 7.63 7.63 ...
## $ p_h_control : num 8.09 8.09 8.09 8.1 8.1 7.9 7.9 7.97 7.97 7.97 ...
## $ p_co2_experimental : num 3573 1386 580 1440 444 ...
## $ p_co2_control : num 580 580 580 444 444 ...
## $ ave_fert_percent_p_h : num 41 80 90 98 99 74 80 51.5 66.4 73.6 ...
## $ error_percent_p_h : num NA NA NA NA NA 4 5 1 3.1 2.7 ...
## $ error_statistic : Factor w/ 5 levels "95% CI","MAD",..: NA NA NA NA NA 3 3 4 4 4 ...
## $ number_trials_p_h : num 3 3 3 1 1 6 6 NA NA NA ...
## $ number_individuals_trial: chr "Egg and sperm pools" "Egg and sperm pools" "Egg and sperm pools" "658 eggs (pooled from individuals)" ...
## $ insemination_time : num 240 240 240 120 120 2880 2880 240 360 1440 ...
## $ sperm_per_m_l : chr NA NA NA NA ...
## $ sperm_egg_ratio : chr "10" "10" "10" NA ...
## $ number_females : chr "8" "8" "8" "6" ...
## $ number_males : chr "11" "11" "11" "8" ...
## $ other : chr "Also 120 minute egg pre-incubation in experimental water" "Also 120 minute egg pre-incubation in experimental water" "Also 120 minute egg pre-incubation in experimental water" NA ...
## $ x22 : num NA NA NA NA NA NA NA NA NA NA ...
ggplotly(data.fert %>%
ggplot(mapping=aes(x=p_h_experimental, y=ave_fert_percent_p_h, group=taxonomic_group, col=taxonomic_group, text=`common_name`)) +
geom_point(size=1.5, width=0.02) +
facet_wrap(~phylum, scale="free") +
geom_smooth(method="lm", se=TRUE, aes(fill=taxonomic_group)) +
ggtitle("Fertilization Rate ~ pH exposure by phylum, linear") +
theme_minimal())
## Warning: Ignoring unknown parameters: width
## Warning: Removed 25 rows containing non-finite values (stat_smooth).
ggplotly(data.fert %>%
ggplot(mapping=aes(x=p_h_experimental, y=ave_fert_percent_p_h, group=taxonomic_group, col=taxonomic_group, text=`common_name`)) +
geom_point(size=1, width=0.02) +
facet_wrap(~phylum, scale="free") +
geom_smooth(method="lm", se=TRUE, formula=y ~ poly(x, 2, raw=TRUE), aes(fill=taxonomic_group)) +
ggtitle("Fertilization Rate ~ pH exposure by phylum, polynomial") +
theme_minimal())
## Warning: Ignoring unknown parameters: width
## Warning: Removed 25 rows containing non-finite values (stat_smooth).
## Warning in predict.lm(model, newdata = new_data_frame(list(x = xseq)), se.fit =
## se, : prediction from a rank-deficient fit may be misleading
Upper 95% CI = Mean + 1.96*SE; I recorded the difference between the upper 95%CI and the mean (Upper 95%CI - Mean). To convert I will use:
SE = (Upper 95%CI - Mean) / 1.96
SD = ((Upper 95%CI - Mean) / 1.96) * sqrt(n)
SE= SD/sqrt(n); where n=sample size. To convert I will use that equation (since I recorded sample size, i.e. number of trials at each pH)
SD = SE*sqrt(n)
I will use that statistic as-is (no conversion), thereby assuming it is SE.
data.fert <- data.fert %>%
mutate(SE = case_when(error_statistic == "SD" ~ error_percent_p_h/sqrt(number_trials_p_h),
error_statistic == "95% CI" ~ error_percent_p_h/1.96,
is.na(error_statistic) ~ error_percent_p_h,
error_statistic == "SE" ~ error_percent_p_h))
data.fert <- data.fert %>%
mutate(SD = case_when(error_statistic == "SE" ~ error_percent_p_h*sqrt(number_trials_p_h),
error_statistic == "95% CI" ~ (error_percent_p_h/1.96)*sqrt(number_trials_p_h),
is.na(error_statistic) ~ error_percent_p_h,
error_statistic == "SD" ~ error_percent_p_h))
data.fert %>% View()
data.fert <- data.fert %>%
mutate(pH_delta = p_h_control-p_h_experimental)
data.fert <- data.fert %>%
mutate(ave_fert_proport = case_when(ave_fert_percent_p_h <= 100 ~ ave_fert_percent_p_h/100,
ave_fert_percent_p_h > 100 ~ 1))
Transormation equation source: https://cran.r-project.org/web/packages/betareg/vignettes/betareg.pdf, " … if y also assumes the extremes 0 and 1, a useful transformation in practice is (y · (n − 1) + 0.5)/n where n is the sample size (Smithson and Verkuilen 2006)."
issue is that a few studies only had 1 trial per pH, so the transformation results in NA values
# data.fert$ave_fert_proport.t <- data.fert$ave_fert_proport*((data.fert$number_trials_p_h-1) + 0.5) / data.fert$number_trials_p_h
data.fert$ave_fert_proport.t <- data.fert$ave_fert_proport - 0.001
data.fert$ave_fert_proport.t %>% hist()
ggplot(data.fert, aes(group=phylum, col=phylum)) + geom_density(aes(ave_fert_proport.t))
## Warning: Removed 13 rows containing non-finite values (stat_density).
weights <- metafor::escalc(measure='MN',
mi=data.fert$ave_fert_percent_p_h,
sdi = data.fert$SD,
ni=data.fert$number_trials_p_h, options(na.action="na.pass"))
test1 <- glmmTMB(ave_fert_proport ~ p_h_experimental + taxonomic_group/phylum + (1|study), data=data.fert, binomial(link = "logit"), na.action=na.exclude)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in fitTMB(TMBStruc): Model convergence problem; non-positive-definite
## Hessian matrix. See vignette('troubleshooting')
test2 <- glmmTMB(ave_fert_proport ~ p_h_experimental*phylum + (1|study), data=data.fert, binomial(link = "logit"), na.action=na.exclude)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
test3 <- glmmTMB(ave_fert_proport ~ p_h_experimental:phylum + (1|study), data=data.fert, binomial(link = "logit"), na.action=na.exclude)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
test4 <- glmmTMB(ave_fert_proport ~ p_h_experimental + phylum + (1|study), data=data.fert, binomial(link = "logit"), na.action=na.exclude)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
test5 <- glmmTMB(ave_fert_proport ~ p_h_experimental + (1|study), data=data.fert, binomial(link = "logit"), na.action=na.exclude)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
test6 <- glmmTMB(ave_fert_proport ~ (1|study), data=data.fert, binomial(link = "logit"), na.action=na.exclude)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
test7 <- glmmTMB(ave_fert_proport ~ phylum + (1|study), data=data.fert, binomial(link = "logit"), na.action=na.exclude)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
test8 <- glmmTMB(ave_fert_proport ~ p_h_experimental, data=data.fert, binomial(link = "logit"), na.action=na.exclude)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC(test1, test2, test3, test4, test5, test6, test7, test8) #test2 smallest AIC.
## Warning in AIC.default(test1, test2, test3, test4, test5, test6, test7, : models
## are not all fitted to the same number of observations
## df AIC
## test1 42 NA
## test2 9 373.7304
## test3 6 369.9990
## test4 6 370.1405
## test5 3 368.4748
## test6 2 404.6719
## test7 5 406.9653
## test8 2 393.3454
#test differene between models test3 and test4. Stick with test5 model.
anova(test5, test4)
## Data: data.fert
## Models:
## test5: ave_fert_proport ~ p_h_experimental + (1 | study), zi=~0, disp=~1
## test4: ave_fert_proport ~ p_h_experimental + phylum + (1 | study), zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## test5 3 368.47 379.67 -181.24 362.47
## test4 6 370.14 392.54 -179.07 358.14 4.3343 3 0.2276
anova(test5, test3)
## Data: data.fert
## Models:
## test5: ave_fert_proport ~ p_h_experimental + (1 | study), zi=~0, disp=~1
## test3: ave_fert_proport ~ p_h_experimental:phylum + (1 | study), zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## test5 3 368.47 379.67 -181.24 362.47
## test3 6 370.00 392.40 -179.00 358.00 4.4758 3 0.2145
anova(test5, test2)
## Data: data.fert
## Models:
## test5: ave_fert_proport ~ p_h_experimental + (1 | study), zi=~0, disp=~1
## test2: ave_fert_proport ~ p_h_experimental * phylum + (1 | study), zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## test5 3 368.47 379.67 -181.24 362.47
## test2 9 373.73 407.33 -177.87 355.73 6.7444 6 0.3451
anova(test8, test5)
## Data: data.fert
## Models:
## test8: ave_fert_proport ~ p_h_experimental, zi=~0, disp=~1
## test5: ave_fert_proport ~ p_h_experimental + (1 | study), zi=~0, disp=~1
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## test8 2 393.35 400.81 -194.67 389.35
## test5 3 368.47 379.67 -181.24 362.47 26.871 1 2.175e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Examine model test5
car::Anova(test5) #phylum not quite significant factor, pH is a sign. factor.
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: ave_fert_proport
## Chisq Df Pr(>Chisq)
## p_h_experimental 17.098 1 3.549e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(test5)
## Family: binomial ( logit )
## Formula: ave_fert_proport ~ p_h_experimental + (1 | study)
## Data: data.fert
##
## AIC BIC logLik deviance df.resid
## 368.5 379.7 -181.2 362.5 306
##
## Random effects:
##
## Conditional model:
## Groups Name Variance Std.Dev.
## study (Intercept) 0.8953 0.9462
## Number of obs: 309, groups: study, 35
##
## Conditional model:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -13.0110 3.3241 -3.914 9.07e-05 ***
## p_h_experimental 1.7928 0.4336 4.135 3.55e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Generate estimates & confidence intervals (log likelihood)
confint(test5)
## 2.5 % 97.5 % Estimate
## cond.(Intercept) -19.5261217 -6.495904 -13.0110131
## cond.p_h_experimental 0.9430309 2.642588 1.7928096
## study.cond.Std.Dev.(Intercept) 0.6096868 1.468417 0.9461896
# Instpect residuals ~ fitted values
aa5 <- augment(test5, data=data.fert)
## Warning in mr - fitted(object): longer object length is not a multiple of
## shorter object length
ggplotly(ggplot(aa5, aes(x=.fitted,y=.resid)) +
geom_point() +
geom_smooth())
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 25 rows containing non-finite values (stat_smooth).
ph.min.max <- data.fert %>%
select(phylum, p_h_experimental) %>%
group_by(phylum) %>%
summarize(min=min(p_h_experimental, na.rm=TRUE), max=max(p_h_experimental, na.rm=TRUE))
phylum.list <- list()
for (i in 1:nrow(ph.min.max)) {
phylum.list[[i]] <- data.frame(ph=c(seq(from=as.numeric(ph.min.max[i,"min"]),
to=as.numeric(ph.min.max[i,"max"]),
by=0.01)),
phylum=rep(c(ph.min.max[i,"phylum"])))
}
new.data <- bind_rows(phylum.list) %>% purrr::set_names(c("p_h_experimental", "phylum"))
new.data$study <- NA
predict.test.df <- predict(test5, newdata = new.data, se.fit = TRUE, type="response")
predict.test.df.df <- predict.test.df %>%
as.data.frame() %>%
cbind(new.data)
#scales::show_col(c("#e41a1c","#4daf4a","#ff7f00","#984ea3",'#377eb8'))
# Data with beta regression model fit
ggplotly(ggplot() +
geom_jitter(data=data.fert, aes(x=p_h_experimental, y=ave_fert_proport, group=phylum, col=phylum), size=1.2, width=0.03) +
#facet_wrap(~phylum, scales="free") + theme_minimal() +
ggtitle("% fertilization ~ pH with binomial-regression model predictions") +
xlab("Experimental pH") + ylab("Fertilization %") +
scale_color_manual(name=NULL, values=c("#e41a1c","#ff7f00","#4daf4a",'#377eb8')) +
theme_minimal() +
coord_cartesian(ylim = c(0, 1), xlim =c(6,8.5)) +
geom_line(data = predict.test.df.df, aes(x=p_h_experimental, y=fit), col="gray50") + #, col=phylum
geom_ribbon(data = predict.test.df.df, aes(x=p_h_experimental, ymin=fit-se.fit, ymax=fit+se.fit), linetype=2, alpha=0.1, col="gray50")) #, fill=phylum